library(readr)
bike.data <- read_csv('http://www.math.montana.edu/ahoegh/teaching/stat408/datasets/Bike.csv')## Parsed with column specification:
## cols(
## datetime = col_datetime(format = ""),
## season = col_double(),
## holiday = col_double(),
## workingday = col_double(),
## weather = col_double(),
## temp = col_double(),
## atemp = col_double(),
## humidity = col_double(),
## windspeed = col_double(),
## casual = col_double(),
## registered = col_double(),
## count = col_double()
## )
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(dplyr)
bike.data <- bike.data %>% mutate(year = as.factor(year(datetime)), month = as.factor(month(datetime)))
monthly.counts <- bike.data %>% group_by(month) %>% summarize(num_bikes = sum(count)) %>% arrange(month)
monthly.counts## # A tibble: 12 x 2
## month num_bikes
## <fct> <dbl>
## 1 1 79884
## 2 2 99113
## 3 3 133501
## 4 4 167402
## 5 5 200147
## 6 6 220733
## 7 7 214617
## 8 8 213516
## 9 9 212529
## 10 10 207434
## 11 11 176440
## 12 12 160160
bike.counts <- aggregate(cbind(bike.data$casual,bike.data$registered),
by=list(bike.data$month), sum)
barplot(t(as.matrix(bike.counts[,-1])),
names.arg =collect(select(monthly.counts, month))[[1]],
xlab='Month', sub ='Source: www.capitalbikeshare.com',
ylab='Bike Rentals',
main='Bike Rentals per Month in 2011 - 2012 \n Capital Bikeshare in Washington, DC',
col=c("darkblue","red"),legend.text = c("Casual", "Registered"),
args.legend = list(x = "topleft"))mean_temp <- bike.data %>% group_by(month) %>%
summarize(mean_temp = mean(temp)) %>%
mutate(month = as.numeric(month))
ggplot(aes(y=temp, x= month), data = bike.data) +
geom_jitter(alpha = .1) +
geom_line(inherit.aes = F, aes(y = mean_temp, x = month),
data = mean_temp, color = 'red', lwd = 2) +
ylab('Average Temp (C)') + xlab('Month') +
labs(title = 'Average Temperature in Washington, DC',
caption = 'Source: www.capitalbikeshare.com')Consider the number of bike rentals per hour per season
bike.data$season <- as.character(bike.data$season)
ggplot(bike.data, aes(season,count, color = season))
+ geom_jitter(alpha = .01) + geom_violin() +
stat_summary(fun.y=mean, geom="point", shape=23, size=2)
+ labs(title='Average number of bike rentals / hour during the four seasons, where 1 = Jan - Mar',
caption = 'Source: Capital BikeShare') +
theme_dark()bike.data$tempF <- bike.data$temp * 1.8 + 32
plot(bike.data$count~bike.data$tempF,pch=16,
col=rgb(100,0,0,10,max=255),ylab='Hourly Bike Rentals',
xlab='Temp (F)',sub ='Source: www.capitalbikeshare.com',
main='Hourly Bike Rentals by Temperature')
bike.fit <- loess(count~tempF,bike.data)
temp.seq <- seq(min(bike.data$tempF),max(bike.data$tempF))
lines(predict(bike.fit,temp.seq)~temp.seq,lwd=2)par(mfcol=c(2,2),oma = c(1,0,0,0))
bike.data$tempF <- bike.data$temp * 1.8 + 32
plot(bike.data$count~bike.data$tempF,pch=16,col=rgb(100,0,0,10,max=255),
ylab='Hourly Bike Rentals',xlab='Temp (F)',
main='Hourly Bike Rentals by Temperature')
bike.fit <- loess(count~tempF,bike.data)
temp.seq <- seq(min(bike.data$tempF),max(bike.data$tempF))
lines(predict(bike.fit,temp.seq)~temp.seq,lwd=2)
plot(bike.data$count~bike.data$humidity,pch=16,
col=rgb(100,0,100,10,max=255),
ylab='Hourly Bike Rentals',xlab='Humidity (%)',
main='Hourly Bike Rentals by Humidity')
bike.fit <- loess(count~humidity,bike.data)
humidity.seq <- seq(min(bike.data$humidity),max(bike.data$humidity))
lines(predict(bike.fit,humidity.seq)~humidity.seq,lwd=2)
plot(bike.data$count~bike.data$windspeed,pch=16,col=rgb(0,0,100,10,max=255),
ylab='Hourly Bike Rentals',xlab='Windspeed (MPH)',main='Hourly Bike Rentals by Windspeed')
bike.fit <- loess(count~windspeed,bike.data)
windspeed.seq <- seq(min(bike.data$windspeed),max(bike.data$windspeed))
lines(predict(bike.fit,windspeed.seq)~windspeed.seq,lwd=2)
plot(bike.data$count~as.factor(bike.data$weather),col=rgb(0,100,0,255,max=255),
ylab='Hourly Bike Rentals',xlab='Weather Conditions',main='Hourly Bike Rentals by Weather')
mtext('Source: www.capitalbikeshare.com', outer = TRUE, cex = .9, side=1)
par(mfcol=c(1,1),oma = c(0,0,0,0))par(mfrow=c(2,1))
bike.data$reltempF <- bike.data$atemp * 1.8 + 32
hist(bike.data$tempF,prob=T,breaks='FD',
main='Temperature (F)',col='red',xlab='',
xlim=c(0,max(c(bike.data$reltempF,bike.data$tempF))))
hist(bike.data$reltempF,prob=T,breaks='FD',
main='Relative Temperature (F)',col='orange',xlab='',
xlim=c(0,max(c(bike.data$reltempF,bike.data$tempF))))